import pandas as pd
import numpy as np

def check_data_structure():
    """检查新数据文件的结构"""
    try:
        # 读取新的数据文件
        print("正在读取新的数据文件...")
        df = pd.read_excel('data/SPKC_Data_train.xlsx')
        
        print(f"\n数据文件基本信息:")
        print(f"数据形状: {df.shape}")
        print(f"列名: {list(df.columns)}")
        
        print(f"\n数据类型:")
        print(df.dtypes)
        
        print(f"\n前5行数据:")
        print(df.head())
        
        print(f"\n数据统计信息:")
        print(df.describe())
        
        print(f"\n缺失值统计:")
        print(df.isnull().sum())
        
        # 检查是否有日期列
        date_columns = []
        for col in df.columns:
            if df[col].dtype == 'object':
                try:
                    pd.to_datetime(df[col].iloc[0])
                    date_columns.append(col)
                except:
                    pass
        
        if date_columns:
            print(f"\n可能的日期列: {date_columns}")
        
        # 检查数值列
        numeric_columns = df.select_dtypes(include=[np.number]).columns.tolist()
        print(f"\n数值列: {numeric_columns}")
        
        # 检查分类列
        categorical_columns = df.select_dtypes(include=['object']).columns.tolist()
        print(f"\n分类列: {categorical_columns}")
        
        return df
        
    except Exception as e:
        print(f"读取数据时出错: {e}")
        return None

if __name__ == "__main__":
    df = check_data_structure() 